#!/usr/bin/env python
# coding: utf-8

# In[12]:


"""Kmeans聚类"""

import pandas as pd
 
traindata = pd.read_csv(r'D:\\data\\1\\train.csv')
testdata = pd.read_csv(r'D:\\data\\1\\test.csv')
 
# 去掉没有意义的一列
traindata.drop('CaseId', axis=1, inplace=True)
testdata.drop('CaseId', axis=1, inplace=True)
 
# 从训练集中分类标签
y_train = traindata['Evaluation']
traindata.drop('Evaluation', axis=1, inplace=True)
 
from sklearn.cluster import KMeans
 
clf = KMeans(n_clusters=2, init='k-means++', n_jobs=-1)
clf.fit(traindata, y)
y_pred = clf.predict(testdata)
 
# 保存预测的结果
submitData = pd.read_csv(r'D:\\data\\1\\Kmeans.csv')
submitData['Evaluation'] = y_pred
submitData.to_csv("Kmeans.csv", index=False)


# In[13]:


"""检测训练精确度"""

# Train accuracy
from sklearn.metrics import accuracy_score
y_train_pred = clf.predict(traindata)
print(y_train_pred)

acc_train = accuracy_score(y_train, y_train_pred)
print("acc_train = %f" % (acc_train))


# In[ ]:




